%matplotlib inline
import pandas as pd
import numpy as np
import yfinance as yf
import numba
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')
mpl.rcParams['figure.figsize'] = [12, 8]
data = yf.Ticker('^GSPC').history(period='max', auto_adjust=False)["1970":]
# Note that when using ^GSPC, there are no dividends, and it's also
# not really directly tradeable - I'll do a continuous futures version
# at some point
# Rest of analysis assumes non-stock allocation is in cash,
# but in reality you would have some gains from being in bonds as well
# We add transaction costs further down this sheet
rets = data['Adj Close'].apply(np.log).diff().apply(np.exp).sub(1)
ma200 = data.Close.rolling(200).mean()
# The 200-day moving average (MA200) is a lagging, smoother version of the original data
data.Close.plot(logy=True)
ma200.plot(logy=True);
# It can be used for betting on momentum, doing market timing by being long when the close is above the MA200
position = (data.Close > ma200).astype(int)
position[ma200.isnull()] = np.nan
position = position.shift() # Delay by one day
position.plot()
def plot_dd(rets):
c_pl = rets.add(1).cumprod()
c_pl.plot(lw=1, color='black', logy=True)
plt.fill_between(
c_pl.index,
c_pl,
c_pl.cummax(),
facecolor='red',
alpha=0.5,
)
# This is the plain old SP500
plot_dd(rets)
# This is with the MA200 crossover timing - the returns are lower but drawdowns are less deep
plot_dd(position * rets)
strats = pd.DataFrame({
'beta': rets,
'ma200': (position * rets)
})
# Slightly worse returns
strats.add(1).apply(np.log).mean().mul(250).apply(np.exp).sub(1)
# Better worst and average drawdown
(
strats
.add(1).cumprod()
.transform(lambda xs: xs / xs.cummax() - 1)
.agg(['min', 'mean'])
.T
)
# These results are all before accounting for trading costs!
# Turnover can be awfully high - 20x in some years! You are trading in and out of the market a lot
# Some concerns about overfitting due to sensitivity to exactly when the crossings happen
(
position.diff().abs()
.groupby(pd.Grouper(freq='Y')).sum()
.rename(index=lambda xs: xs.year)
.plot.bar()
)
# Let's smoothen the signal by acting slowly on it!
@numba.njit
def _smoothen(signal, nsteps):
pos = np.zeros_like(signal, dtype=np.int64)
signal = signal * nsteps
pos[0] = nsteps
for i in range(1, pos.shape[0]):
if signal[i] > pos[i - 1]:
d = 1
elif signal[i] < pos[i - 1]:
d = -1
else:
d = 0
pos[i] = pos[i - 1] + d
return pos / nsteps
def smoothen(signal, nsteps):
return pd.Series(
_smoothen(signal.values, nsteps),
index=signal.index
)
position_smooth = smoothen(position, 90)
# Smoothed MA200 crossover
plot_dd(position_smooth * rets)
# Turnover is way down
(
position_smooth.diff().abs()
.groupby(pd.Grouper(freq='Y')).sum()
.rename(index=lambda xs: xs.year)
.plot.bar()
)
strats = pd.DataFrame({
'beta': rets,
'ma200': (position * rets),
'ma200_slow': (position_smooth * rets),
})
# Average annual returns
strats.add(1).apply(np.log).mean().mul(250).apply(np.exp).sub(1)
# Drawdowns
dd = (
strats
.add(1).cumprod()
.transform(lambda xs: xs / xs.cummax() - 1)
)
dd_agg = dd.agg(['min', 'mean'])
print(dd_agg)
dd_agg.plot.bar()
# Drawdowns side by side
dd.plot(lw=1)
# Original signal slams on and off
position.plot()
# New signal drifts in and out
position_smooth.plot()
# The original signal has trades on 4.2% of all days
position.diff().ne(0).mean()
# Smoothened signal trades (a much smaller amount in aggregate) on ~30% of all days
position_smooth.diff().ne(0).mean()
# Some years as many as 80% of all days
position_smooth.diff().ne(0).groupby(pd.Grouper(freq='Y')).mean().rename(index=lambda xs: xs.year).plot.bar()
# Assume 20 bps transaction cost - strategy costs at most 50 bps a year to execute
cost_smooth = position_smooth.diff().abs().mul(20. / 10000)
cost_smooth.groupby(pd.Grouper(freq='Y')).sum().rename(index=lambda xs: xs.year).plot.bar()
# Contrast with original signal - a few percent a year!
cost = position.diff().abs().mul(20. / 10000)
cost.groupby(pd.Grouper(freq='Y')).sum().rename(index=lambda xs: xs.year).plot.bar()
# Difference from beta by year
(
(position_smooth * rets - cost_smooth + 1)
.div(rets + 1)
.cumprod().sub(1).plot()
)
# Difference from beta by year
(
(position_smooth * rets - cost + 1)
.div(rets + 1)
.groupby(pd.Grouper(freq='Y')).prod()
.sub(1)
.rename(index=lambda xs: xs.year).plot.bar()
)
# Side by side, with transaction costs
(
strats
.assign(
ma200_slow=lambda df: df.ma200_slow - cost_smooth,
ma200=lambda df: df.ma200 - cost,
).add(1).cumprod().plot(logy=True)
)